#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/crc32c.h>
#include "compat.h"
-#include "crc32c.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "print-tree.h"
#include "async-thread.h"
#include "locking.h"
-#include "ref-cache.h"
#include "tree-log.h"
#include "free-space-cache.h"
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
+static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
+
/*
* end_io_wq structs are used to do processing in task context when an IO is
* complete. This is used during reads to verify checksums, and it is used
struct extent_map *em;
int ret;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em) {
em->bdev =
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
goto out;
}
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
em = alloc_extent_map(GFP_NOFS);
if (!em) {
em->block_start = 0;
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
u64 failed_start = em->start;
free_extent_map(em);
em = NULL;
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret)
em = ERR_PTR(ret);
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
{
- return btrfs_crc32c(seed, data, len);
+ return crc32c(seed, data, len);
}
void btrfs_csum_final(u32 crc, char *result)
{
root->node = NULL;
root->commit_root = NULL;
- root->ref_tree = NULL;
root->sectorsize = sectorsize;
root->nodesize = nodesize;
root->leafsize = leafsize;
root->fs_info = fs_info;
root->objectid = objectid;
root->last_trans = 0;
- root->highest_inode = 0;
- root->last_inode_alloc = 0;
+ root->highest_objectid = 0;
root->name = NULL;
root->in_sysfs = 0;
+ root->inode_tree.rb_node = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
- INIT_LIST_HEAD(&root->dead_list);
+ INIT_LIST_HEAD(&root->root_list);
spin_lock_init(&root->node_lock);
spin_lock_init(&root->list_lock);
+ spin_lock_init(&root->inode_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping, GFP_NOFS);
- btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
- root->ref_tree = &root->ref_tree_struct;
-
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
+ root->commit_root = btrfs_root_node(root);
BUG_ON(!root->node);
return 0;
}
*/
root->ref_cows = 0;
- leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
- 0, BTRFS_TREE_LOG_OBJECTID,
- trans->transid, 0, 0, 0);
+ leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
+ BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
}
+ memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_bytenr(leaf, leaf->start);
+ btrfs_set_header_generation(leaf, trans->transid);
+ btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
root->node = leaf;
- btrfs_set_header_nritems(root->node, 0);
- btrfs_set_header_level(root->node, 0);
- btrfs_set_header_bytenr(root->node, root->node->start);
- btrfs_set_header_generation(root->node, trans->transid);
- btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
write_extent_buffer(root->node, root->fs_info->fsid,
(unsigned long)btrfs_header_fsid(root->node),
inode_item->nbytes = cpu_to_le64(root->leafsize);
inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
- btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
- btrfs_set_root_generation(&log_root->root_item, trans->transid);
+ btrfs_set_root_node(&log_root->root_item, log_root->node);
WARN_ON(root->log_root);
root->log_root = log_root;
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
struct extent_buffer *l;
- u64 highest_inode;
u64 generation;
u32 blocksize;
int ret = 0;
kfree(root);
return ERR_PTR(ret);
}
- goto insert;
+ goto out;
}
__setup_root(tree_root->nodesize, tree_root->leafsize,
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
- if (ret != 0) {
- if (ret > 0)
- ret = -ENOENT;
- goto out;
+ if (ret == 0) {
+ l = path->nodes[0];
+ read_extent_buffer(l, &root->root_item,
+ btrfs_item_ptr_offset(l, path->slots[0]),
+ sizeof(root->root_item));
+ memcpy(&root->root_key, location, sizeof(*location));
}
- l = path->nodes[0];
- read_extent_buffer(l, &root->root_item,
- btrfs_item_ptr_offset(l, path->slots[0]),
- sizeof(root->root_item));
- memcpy(&root->root_key, location, sizeof(*location));
- ret = 0;
-out:
- btrfs_release_path(root, path);
btrfs_free_path(path);
if (ret) {
- kfree(root);
+ if (ret > 0)
+ ret = -ENOENT;
return ERR_PTR(ret);
}
+
generation = btrfs_root_generation(&root->root_item);
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
+ root->commit_root = btrfs_root_node(root);
BUG_ON(!root->node);
-insert:
- if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+out:
+ if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
root->ref_cows = 1;
- ret = btrfs_find_highest_inode(root, &highest_inode);
- if (ret == 0) {
- root->highest_inode = highest_inode;
- root->last_inode_alloc = highest_inode;
- }
- }
+
return root;
}
}
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_find_dead_roots(fs_info->tree_root,
- root->root_key.objectid, root);
+ root->root_key.objectid);
BUG_ON(ret);
btrfs_orphan_cleanup(root);
}
offset = page_offset(page);
em_tree = &BTRFS_I(inode)->extent_tree;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em) {
__unplug_io_fn(bdi, page);
return;
free_extent_map(em);
}
+/*
+ * If this fails, caller must call bdi_destroy() to get rid of the
+ * bdi again.
+ */
static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
{
- bdi_init(bdi);
+ int err;
+
+ bdi->capabilities = BDI_CAP_MAP_COPY;
+ err = bdi_init(bdi);
+ if (err)
+ return err;
+
+ err = bdi_register(bdi, NULL, "btrfs-%d",
+ atomic_inc_return(&btrfs_bdi_num));
+ if (err)
+ return err;
+
bdi->ra_pages = default_backing_dev_info.ra_pages;
- bdi->state = 0;
- bdi->capabilities = default_backing_dev_info.capabilities;
bdi->unplug_io_fn = btrfs_unplug_io_fn;
bdi->unplug_io_data = info;
bdi->congested_fn = btrfs_congested_fn;
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
+ INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
- atomic_set(&fs_info->throttles, 0);
- atomic_set(&fs_info->throttle_gen, 0);
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
- setup_bdi(fs_info, &fs_info->bdi);
+ if (setup_bdi(fs_info, &fs_info->bdi))
+ goto fail_bdi;
fs_info->btree_inode = new_inode(sb);
fs_info->btree_inode->i_ino = 1;
fs_info->btree_inode->i_nlink = 1;
fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
+ RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
fs_info->btree_inode->i_mapping,
GFP_NOFS);
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree.rb_node = NULL;
- extent_io_tree_init(&fs_info->pinned_extents,
+ extent_io_tree_init(&fs_info->freed_extents[0],
+ fs_info->btree_inode->i_mapping, GFP_NOFS);
+ extent_io_tree_init(&fs_info->freed_extents[1],
fs_info->btree_inode->i_mapping, GFP_NOFS);
+ fs_info->pinned_extents = &fs_info->freed_extents[0];
fs_info->do_barriers = 1;
- INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
- btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
- btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
-
BTRFS_I(fs_info->btree_inode)->root = tree_root;
memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
sizeof(struct btrfs_key));
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
mutex_init(&fs_info->tree_reloc_mutex);
+ init_rwsem(&fs_info->extent_commit_sem);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
goto fail_iput;
}
+ features = btrfs_super_incompat_flags(disk_super);
+ if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+ features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+ btrfs_set_super_incompat_flags(disk_super, features);
+ }
+
features = btrfs_super_compat_ro_flags(disk_super) &
~BTRFS_FEATURE_COMPAT_RO_SUPP;
if (!(sb->s_flags & MS_RDONLY) && features) {
err = -EINVAL;
goto fail_iput;
}
-
+printk("thread pool is %d\n", fs_info->thread_pool_size);
/*
* we need to start all the end_io workers up front because the
* queue work function gets called at interrupt time, and so it
fs_info->endio_workers.idle_thresh = 4;
fs_info->endio_meta_workers.idle_thresh = 4;
- fs_info->endio_write_workers.idle_thresh = 64;
- fs_info->endio_meta_write_workers.idle_thresh = 64;
+ fs_info->endio_write_workers.idle_thresh = 2;
+ fs_info->endio_meta_write_workers.idle_thresh = 2;
+
+ fs_info->endio_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_workers.atomic_worker_start = 1;
+ fs_info->endio_write_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_write_workers.atomic_worker_start = 1;
btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->delalloc_workers, 1);
btrfs_start_workers(&fs_info->fixup_workers, 1);
- btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_write_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_write_workers,
- fs_info->thread_pool_size);
+ btrfs_start_workers(&fs_info->endio_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_write_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
if (ret) {
printk(KERN_WARNING "btrfs: failed to read the system "
"array on %s\n", sb->s_id);
- goto fail_sys_array;
+ goto fail_sb_buffer;
}
blocksize = btrfs_level_size(tree_root,
btrfs_super_chunk_root(disk_super),
blocksize, generation);
BUG_ON(!chunk_root->node);
+ if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+ printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
+ sb->s_id);
+ goto fail_chunk_root;
+ }
+ btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
+ chunk_root->commit_root = btrfs_root_node(chunk_root);
read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
(unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
blocksize, generation);
if (!tree_root->node)
goto fail_chunk_root;
-
+ if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+ printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
+ sb->s_id);
+ goto fail_tree_root;
+ }
+ btrfs_set_root_node(&tree_root->root_item, tree_root->node);
+ tree_root->commit_root = btrfs_root_node(tree_root);
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_EXTENT_TREE_OBJECTID, extent_root);
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_DEV_TREE_OBJECTID, dev_root);
- dev_root->track_dirty = 1;
if (ret)
goto fail_extent_root;
+ dev_root->track_dirty = 1;
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_CSUM_TREE_OBJECTID, csum_root);
if (ret)
- goto fail_extent_root;
+ goto fail_dev_root;
csum_root->track_dirty = 1;
if (IS_ERR(fs_info->transaction_kthread))
goto fail_cleaner;
+ if (!btrfs_test_opt(tree_root, SSD) &&
+ !btrfs_test_opt(tree_root, NOSSD) &&
+ !fs_info->fs_devices->rotating) {
+ printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
+ "mode\n");
+ btrfs_set_opt(fs_info->mount_opt, SSD);
+ }
+
if (btrfs_super_log_root(disk_super) != 0) {
u64 bytenr = btrfs_super_log_root(disk_super);
}
if (!(sb->s_flags & MS_RDONLY)) {
- ret = btrfs_cleanup_reloc_trees(tree_root);
+ ret = btrfs_recover_relocation(tree_root);
BUG_ON(ret);
}
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
+
return tree_root;
fail_trans_kthread:
fail_csum_root:
free_extent_buffer(csum_root->node);
+ free_extent_buffer(csum_root->commit_root);
+fail_dev_root:
+ free_extent_buffer(dev_root->node);
+ free_extent_buffer(dev_root->commit_root);
fail_extent_root:
free_extent_buffer(extent_root->node);
+ free_extent_buffer(extent_root->commit_root);
fail_tree_root:
free_extent_buffer(tree_root->node);
+ free_extent_buffer(tree_root->commit_root);
fail_chunk_root:
free_extent_buffer(chunk_root->node);
-fail_sys_array:
- free_extent_buffer(dev_root->node);
+ free_extent_buffer(chunk_root->commit_root);
fail_sb_buffer:
btrfs_stop_workers(&fs_info->fixup_workers);
btrfs_stop_workers(&fs_info->delalloc_workers);
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
+fail_bdi:
bdi_destroy(&fs_info->bdi);
-
fail:
kfree(extent_root);
kfree(tree_root);
return latest;
}
+/*
+ * this should be called twice, once with wait == 0 and
+ * once with wait == 1. When wait == 0 is done, all the buffer heads
+ * we write are pinned.
+ *
+ * They are released when wait == 1 is done.
+ * max_mirrors must be the same for both runs, and it indicates how
+ * many supers on this one device should be written.
+ *
+ * max_mirrors == 0 means to write them all.
+ */
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb,
int do_barriers, int wait, int max_mirrors)
bh = __find_get_block(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
BUG_ON(!bh);
- brelse(bh);
wait_on_buffer(bh);
- if (buffer_uptodate(bh)) {
- brelse(bh);
- continue;
- }
+ if (!buffer_uptodate(bh))
+ errors++;
+
+ /* drop our reference */
+ brelse(bh);
+
+ /* drop the reference from the wait == 0 run */
+ brelse(bh);
+ continue;
} else {
btrfs_set_super_bytenr(sb, bytenr);
BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, sb->csum);
+ /*
+ * one reference for us, and we leave it for the
+ * caller
+ */
bh = __getblk(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
- set_buffer_uptodate(bh);
+ /* one reference for submit_bh */
get_bh(bh);
+
+ set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
}
device->name);
set_buffer_uptodate(bh);
device->barriers = 0;
+ /* one reference for submit_bh */
get_bh(bh);
lock_buffer(bh);
ret = submit_bh(WRITE_SYNC, bh);
ret = submit_bh(WRITE_SYNC, bh);
}
- if (!ret && wait) {
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- errors++;
- } else if (ret) {
+ if (ret)
errors++;
- }
- if (wait)
- brelse(bh);
}
return errors < i ? 0 : -1;
}
int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
- struct list_head *head = &root->fs_info->fs_devices->devices;
+ struct list_head *head;
struct btrfs_device *dev;
struct btrfs_super_block *sb;
struct btrfs_dev_item *dev_item;
sb = &root->fs_info->super_for_commit;
dev_item = &sb->dev_item;
+
+ mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+ head = &root->fs_info->fs_devices->devices;
list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) {
total_errors++;
if (ret)
total_errors++;
}
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (total_errors > max_errors) {
printk(KERN_ERR "btrfs: %d errors while writing supers\n",
total_errors);
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
+ WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
radix_tree_delete(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid);
if (root->anon_super.s_dev) {
ARRAY_SIZE(gang));
if (!ret)
break;
+
+ root_objectid = gang[ret - 1]->root_key.objectid + 1;
for (i = 0; i < ret; i++) {
root_objectid = gang[i]->root_key.objectid;
ret = btrfs_find_dead_roots(fs_info->tree_root,
- root_objectid, gang[i]);
+ root_objectid);
BUG_ON(ret);
btrfs_orphan_cleanup(gang[i]);
}
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
+ fs_info->closing = 2;
+ smp_mb();
+
if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes);
(unsigned long long)fs_info->total_ref_cache_size);
}
- if (fs_info->extent_root->node)
- free_extent_buffer(fs_info->extent_root->node);
-
- if (fs_info->tree_root->node)
- free_extent_buffer(fs_info->tree_root->node);
-
- if (root->fs_info->chunk_root->node)
- free_extent_buffer(root->fs_info->chunk_root->node);
-
- if (root->fs_info->dev_root->node)
- free_extent_buffer(root->fs_info->dev_root->node);
-
- if (root->fs_info->csum_root->node)
- free_extent_buffer(root->fs_info->csum_root->node);
+ free_extent_buffer(fs_info->extent_root->node);
+ free_extent_buffer(fs_info->extent_root->commit_root);
+ free_extent_buffer(fs_info->tree_root->node);
+ free_extent_buffer(fs_info->tree_root->commit_root);
+ free_extent_buffer(root->fs_info->chunk_root->node);
+ free_extent_buffer(root->fs_info->chunk_root->commit_root);
+ free_extent_buffer(root->fs_info->dev_root->node);
+ free_extent_buffer(root->fs_info->dev_root->commit_root);
+ free_extent_buffer(root->fs_info->csum_root->node);
+ free_extent_buffer(root->fs_info->csum_root->commit_root);
btrfs_free_block_groups(root->fs_info);
* looks as though older kernels can get into trouble with
* this code, they end up stuck in balance_dirty_pages forever
*/
- struct extent_io_tree *tree;
u64 num_dirty;
- u64 start = 0;
unsigned long thresh = 32 * 1024 * 1024;
- tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
if (current->flags & PF_MEMALLOC)
return;
- num_dirty = count_range_bits(tree, &start, (u64)-1,
- thresh, EXTENT_DIRTY);
+ num_dirty = root->fs_info->dirty_metadata_bytes;
+
if (num_dirty > thresh) {
balance_dirty_pages_ratelimited_nr(
root->fs_info->btree_inode->i_mapping, 1);